系统发育与物种分布模型code
下载基因序列 _R
library(ape)
d <- read.csv("c:/Users/admin/Desktop/构架沙棘属系统发育树/gene.csv")
t <- c(paste0("hsal_",1:3),paste0("hgya_",1:3),paste0("hneu_",1:3),paste0("hste_",1:3),paste0("htib_",1:3),
paste0("hyun_",1:3),paste0("hsin_",1:3),paste0("htur_",1:3),paste0("hmon_",1:3),paste0("hcau_",1:3),
paste0("hfulu_",1:3),paste0("hcar_",1:2),paste0("hrha_",1),"triflora_1","umbellata_1","argentea_1")
for(i in 1:39){
sal1 <- d[i,-1] %>% as.character()
z = read.GenBank(sal1 )
route <- paste0("c:/Users/admin/Desktop/",t[i],".fas")
write.dna(z,file =route,format = "fasta")
}
读取及写出序列
setwd("c:/Users/admin/Desktop/构架沙棘属系统发育树/下载序列/")
library(ape)
t1 <- read.FASTA("./hcar_1.fas")
t2 <- read.FASTA("./hcar_2.fas")
names(t1) <- c(paste0("a",1:5))
nn <- (t1[1],t2[1])
write.dna(nn,file ="../try1.fas",format = "fasta")
构建及可视化系统发育树
setwd("c:/Users/admin/Desktop/构架沙棘属系统发育树/下载序列/")
library(ape)
t1 <- read.FASTA("./hcar_1.fas")
t2 <- read.FASTA("./hcar_2.fas")
names(t1) <- c(paste0("a",1:5))
nn <- (t1[1],t2[1])
write.dna(nn,file ="../try1.fas",format = "fasta")
library(ape)
library(treeio)
library(ggtree)
library(treeio)
da1 <- read.nexus(file = 'c:/Users/admin/Desktop/mcc.species(1).txt')
beast <- read.beast('c:/Users/admin/Desktop/mcc.species(1).txt')
beast@data$posterior
keepers <- c("umbellata", "triflora", "argentea",
"hste", "hneu", "hsal","hgya","htib","hcar")
da2 <- ape::drop.tip(phy = beast@phylo,tip = beast@phylo$tip.label[beast@phylo$tip.label %in% keepers])
beast@data$posterior %>% length()
da2$
plot(da2)
beast@phylo$tip.label %>% length()
ggtree(beast) + geom_tiplab(geom = "text",size = 2.5) +
geom_nodepoint() + geom_n
cophenetic.phylo(da2)
dax <- read.nexus(file = 'c:/Users/admin/Desktop/mcc.species(1).txt')
da2$edge
da2$edge.length
da2$Nnode
da2$tip.label <- toupper(da2$tip.label)
p1 <- ggtree(da2) + theme_tree2()
p1 + scale_x_continuous(breaks = c(0,1:7),labels = abs)
p2 = revts(p1) + scale_x_continuous(breaks = c(-6:0),labels = paste0(6:0,"Ma"))
p2 + geom_tiplab(geom = "text",size = 2.5) +geom_nodepoint()
getwd()
png("鼠李沙棘6.png",width =2500, height = 1000,res =300)
p2 + geom_tiplab(geom = "text",size = 2.5) +geom_nodepoint()
dev.off()
p3 <- p2 + geom_tiplab(geom = "text",size = 2.5) +geom_nodepoint()
p3
p3 + geom_tippoint()
p3 + geom_tiplab()
p3 + geom_text(aes(label=node), hjust=-.3)
p3 +geom_tiplab() +
geom_hilight(node=11, fill="gold") +
geom_hilight(node=13, fill="purple")
p3 +
geom_tiplab() +
geom_cladelabel(node=13, label="Some random clade",
color="red2", offset=.8, align=TRUE)
p3 + geom_tiplab() +
geom_taxalink("E", "H", color="blue3") +
geom_taxalink("C", "G", color="orange2", curvature=-.9)
p3 +
theme_tree2() +
geom_tiplab(align=TRUE, linesize=.5) +
xlim(0, 10)
plot(da2)
time <- da2$edge.length[-c(1:7)] %>% round(2)
nodelabels(time, frame = "c", bg = "white")
add.scale.bar(length = 1 ,lwd = 1.5,labels = "1Ma")
plot(da1)
nodelabels(da1$node.label, adj = 0, frame = "n")
p3 <- p2 + geom_tiplab(geom = "text",size = 1) +geom_nodepoint()
p3 + geom_text(aes(label=node), hjust=-.3)
p3 + geom_text(aes(label=node), hjust=-.3) +
geom_hilight(node=15, fill="slateblue1") +
geom_hilight(node=12, fill="yellow")
p3 + geom_text(aes(label=node), hjust=-.3) +
geom_cladelabel(node=1, label="高加速山脉",
color="red2", offset=.8, align=TRUE)
将系统发育树映射到地图中
library(phytools)
library(mapdata)
library(viridis)
srha <- read.csv("./F1_SP/thrid_po/srhar.csv")[,3:4]
srha <- srha[sample(nrow(srha), 0.5*nrow(srha)),]
head(hthi)
hs <- rbind(scau,sflu,smon,srha,ssin,stur,syun) %>% data.frame()
hs1 <- cbind(hs$latitude,hs$longitude) %>% data.frame()
names(hs1) <- c("lat","long")
hs1 %>% as.matrix() -> hs1
va <- c(rep("Hcau",dim(scau)[1]),rep("Hflu",dim(sflu)[1]),
rep("Hmon",dim(smon)[1]),rep("Hrha",dim(srha)[1]),
rep("Hsin",dim(ssin)[1]),
rep("Htur",dim(stur)[1]),rep("Hyun",dim(syun)[1]))
row.names(hs1) <- va
coo <- c("#66CC007f","#9900FF7f","#9999FF","#FFCC007f","#FF66667f",
"#3333FF7f","#FF33007f")
cols <-setNames(coo,
da2$tip.label)
plot(da2)
obj<-phylo.to.map(da2,hs1,rotate =F ,database="worldHires",xlim = c(-7,130),ylim= c(20,70),
plot=FALSE,direction="downwards")
plot(obj,direction="downwards",colors=cols,cex.points=c(0,1),rotate = T,
lwd=c(3,1),ftype="i")
png("c:/Users/admin/Desktop/鼠李沙棘1.png",width =2500, height = 1000,res =300)
plot(obj,direction="downwards",colors=cols,cex.points=c(0,1),rotate = T,
lwd=c(3,1),ftype="i")
dev.off()
png("c:/Users/admin/Desktop/鼠李沙棘2.png",width =2500, height = 1000,res =300)
for(i in 1:Ntip(da2)){
spr<-hs1[which(rownames(hs1)==da2$tip.label[i]),]
mcp<-if(i==1) spr[chull(spr),] else rbind(mcp,spr[chull(spr),])
}
plot(obj,direction="downwards",colors=cols,cex.points=c(0,0.6),
lwd=c(3,1),ftype="i")
for(i in 1:Ntip(da2)){
ii<-which(rownames(mcp)==da2$tip.label[i])
polygon(mcp[ii,2:1],col=make.transparent(cols[da2$tip.label[i]],0.8),
border="darkgrey")
}
obj<-phylo.to.map(da2,mcp,database="worldHires",xlim = c(-7,130),ylim= c(24,70),
plot=FALSE,direction="downwards")
plot(obj,direction="downwards",colors=cols,cex.points=c(0,1),
lwd=c(3,1),ftype="i")
沿时间轴可视化系统发育树
p1 <- ggtree(da2) + theme_tree2()
p1 + scale_x_continuous(breaks = c(seq(0,21,2)),labels = abs)
p2 = revts(p1) + scale_x_continuous(breaks = c(seq(-20,0,2)),
labels = paste0(c(seq(20,0,-2)),"Ma"))
p2 + geom_tiplab(geom = "text",size = 4) +geom_nodepoint()
getwd()
png("鼠李沙棘6.png",width =3000, height = 1500,res =300)
p2 + geom_tiplab(geom = "text",size = 4) +geom_nodepoint()
dev.off()
计算系统发生距离
## 计算分化时间距离:
timedist <- cophenetic.phylo(da2)
基于ml树,重建年龄重建树
library(ape)
ctrl <- chronos.control(nb.rate.cat = 1)
chr.clock <- chronos(tree4, model = "discrete", control = ctrl)
cal <- makeChronosCalib(tree4, node = "root", age.min = 1,
age.max = 9, interactive = FALSE, soft.bounds = FALSE)
tree5 <- chronos(tree4,calibration = cal)
plot(tree5)
tree5$tip.label <- c("hcau" ,"htur","hflu","hrha", "hmon" ,"hsin" , "hyun")
计算生态位重叠与系统发育时间之间的 联系
over <- read.table("clipboard", header = T, sep = '\t') %>% as.data.frame()
nn <- names(over)[-1]
ov1 <- over[,-1]
row.names(ov1) <- nn
plot(da2)
png("鼠李沙棘3.png",width =3000, height = 1500,res =300)
x <- age.range.correlation(phy = da2, overlap = ov1, n = 100)
plot(x$age.range.correlation,xlab = "Age",ylab = "Overlap")
apply(x$MonteCarlo.replicates, 1, abline, lwd = 0.2, col = "grey50")
abline(x$linear.regression$coefficients,lwd = 2)
dev.off()
getwd()
da <- x$age.range.correlation %>% data.frame()
da2 <- x$MonteCarlo.replicates %>% data.frame()
names(da2) <- c("Intercept","Age")
x$linear.regression
x$age.range.correlation
p1 <- ggplot(data = da2,aes(x = Intercept)) + geom_histogram()+
geom_vline(xintercept = 0.17,
linetype = "longdash",color= "red",lwd =1) +
guides(fill = "none", alpha = FALSE) +
xlab("Intercept") + ylab("")+
annotate("text", x = 0.5, y = 7.5, label = "P.value = 0.34",
size = 5, color = "#22292F") +
ggtitle("Age-Overlap correlation from Monte Carlo") +
theme(plot.title = element_text(hjust = 0.5))+
theme_classic()
p2 <- ggplot(data = da2,aes(x = Age)) + geom_histogram()+
geom_vline(xintercept = -0.0189 ,
linetype = "longdash",color= "red",lwd =1) +
guides(fill = "none", alpha = FALSE) +
xlab("Slope") + ylab("")+
annotate("text", x = 0.01, y = 30, label = "P.value = 0.42",
size = 5, color = "#22292F") +
ggtitle("Age-Overlap correlation from Monte Carlo") +
theme(plot.title = element_text(hjust = 0.5))+
theme_classic()
png("鼠李沙棘6.png",width =3000, height = 1500,res =300)
p2
dev.off()
祖先气候耐受性重建
library(ape)
ctrl <- chronos.control(nb.rate.cat = 1)
chr.clock <- chronos(tree4, model = "discrete", control = ctrl)
cal <- makeChronosCalib(tree4, node = "root", age.min = 1,
age.max = 9, interactive = FALSE, soft.bounds = FALSE)
tree5 <- chronos(tree4,calibration = cal)
plot(tree5)
tree5$tip.label <- c("hcau" ,"htur","hflu","hrha", "hmon" ,"hsin" , "hyun")
library(phyloclim)
setwd("e:/sjdata/")
tifcau <- raster("./工作结果总结/鼠李沙棘种下生态位比较/sdm印证/scau/ense_mean.tif")
tifflu <- raster("./工作结果总结/鼠李沙棘种下生态位比较/sdm印证/sflu/ense_mean.tif")
tifmon <- raster("./工作结果总结/鼠李沙棘种下生态位比较/sdm印证/smon/smon_mean.tif")
tifrha <- raster("./工作结果总结/鼠李沙棘种下生态位比较/sdm印证/srha/ense_mean.tif")
tifsin <- raster("./工作结果总结/鼠李沙棘种下生态位比较/sdm印证/ssin/ense_mean.tif")
tiftur <- raster("./工作结果总结/鼠李沙棘种下生态位比较/sdm印证/stur/ense_mean.tif")
tifyun <- raster("./工作结果总结/鼠李沙棘种下生态位比较/sdm印证/syun/ense_mean.tif")
bio10 <- raster("./F2_ENVS/aseutif/bio10.tif")
bio12 <- raster("./F2_ENVS/aseutif/bio12.tif")
bio11 <- raster("./F2_ENVS/aseutif/bio11.tif")
da <- rbind(ssin,syun,scau,sflu,srha,smon,stur) %>% data.frame()
writeRaster(bio11,"E:/sjdata/temp/bio11.asc",format = "ascii",overwrite=TRUE)
mcp <- function (xy) {
library(rgdal)
xy <- as.data.frame(coordinates(xy))
coords.t <- chull(xy[, 1], xy[, 2])
xy.bord <- xy[coords.t, ]
xy.bord <- rbind(xy.bord[nrow(xy.bord), ], xy.bord)
return(SpatialPolygons(list(Polygons(list(Polygon(as.matrix(xy.bord))), 1))))
}
toasc1 <- function(occ,tif){
m1 <- mcp(occ)
m2 <- rgeos::gBuffer(m1,width = 0.1)
m3 <- raster::crop(tif,m2)
mm <- paste0(deparse(substitute(occ)),deparse(substitute(tif)))
rooute2 <- paste0("E:/sjdata/temp/",mm)
dir.create(rooute2)
rooute1 <- paste0(rooute2,"/",mm,".asc")
writeRaster(m3,rooute1,format = "ascii",overwrite=TRUE)
return( rooute1)
}
bio10xx <- toasc1(da,bio10)
bio12xx <- toasc1(da,bio12)
bio11xx <- toasc1(da,bio11)
toasc2 <- function(occ,tif){
m1 <- mcp(occ)
m2 <- rgeos::gBuffer(m1,width = 0.1)
m3 <- raster::crop(tif,m2)
mm <- paste0(deparse(substitute(occ)))
rooute2 <- paste0("E:/sjdata/temp/model/")
rooute1 <- paste0(rooute2,mm,".asc")
writeRaster(m3,rooute1,format = "ascii",overwrite=TRUE)
return( rooute2)
}
tifcau1 <- toasc2(scau,tifcau)
tifflu1 <- toasc2(sflu,tifflu)
tifrha1 <- toasc2(srha,tifrha)
tifmon1 <- toasc2(smon,tifmon)
tifsin1 <- toasc2(ssin,tifsin)
tifyun1 <- toasc2(syun,tifyun)
tiftur1 <- toasc2(stur,tiftur)
path_model = tifcau
path_bioclim = bio10cau
pno2 <- function(occ,tif1,tif2){
toasc1 <- function(occ,tif1){
m1 <- mcp(occ)
m2 <- rgeos::gBuffer(m1,width = 0.1)
m3 <- raster::crop(tif,m2)
mm <- paste0(deparse(substitute(occ)),deparse(substitute(tif)))
rooute2 <- paste0("E:/sjdata/temp/",mm)
dir.create(rooute2)
rooute1 <- paste0(rooute2,"/",mm,".asc")
writeRaster(m3,rooute1,format = "ascii",overwrite=TRUE)
return( rooute1)
}
toasc2 <- function(occ,tif2){
m1 <- mcp(occ)
m2 <- rgeos::gBuffer(m1,width = 0.1)
m3 <- raster::crop(tif,m2)
mm <- paste0(deparse(substitute(occ)),deparse(substitute(tif)))
rooute2 <- paste0("E:/sjdata/temp/",mm)
dir.create(rooute2)
rooute1 <- paste0(rooute2,"/",mm,".asc")
writeRaster(m3,rooute1,format = "ascii",overwrite=TRUE)
return( rooute2)
}
bio10cau <- toasc1(occ,tif1)
tifcau <- toasc2(occ,tif2)
path_model = tifcau
path_bioclim = bio10cau
cau <- pno(path_bioclim, path_model, subset = NULL,
bin_width = 1, bin_number = 40)
return(cau)
}
path_bioclim <- bio10xx
path_model <- "E:/sjdata/temp/model/"
bio10 <- pno(path_bioclim, path_model, subset = NULL,
bin_width = 1, bin_number = 40)
path_bioclim <- bio12xx
bio12va <- pno(path_bioclim, path_model, subset = NULL,
bin_width = 1, bin_number = 40)
path_bioclim <- bio11xx
bio11va <- pno(path_bioclim, path_model, subset = NULL,
bin_width = 1, bin_number = 40)
write.csv(bio12va,"bio12va.csv")
bio10 <- read.csv("./bio10va.csv")[,-1] %>% round(3)
bio12 <- read.csv("./bio12va.csv")[,-1] %>% round(3)
clim2 = bio11va %>% round(3) %>% data.frame()
names(clim2)
clim3 <- cbind(clim2[,1:2],clim2[,7],clim2[,3],clim2[,5],clim2[,4],
clim2[,6],clim2[,8]) %>% data.frame()
names(clim3) <- c("variable" ,"hcau" ,"htur","hflu","hrha", "hmon" ,"hsin" , "hyun")
clim4 <- cbind(clim2[,1:2],clim2[,7],clim2[,3],clim2[,5],clim2[,4],
clim2[,6],clim2[,8]) %>% data.frame()
names(clim4) <- c("variable" ,"hcau" ,"htur","hflu","hrha", "hmon" ,"hsin" , "hyun")
tail(clim4)
clim4$variable <- log(clim4$variable)
clim5 <- clim4[2:30,] %>% data.frame()
clim5
library(phyloclim)
plotPNO(x = clim4,
xlab = "Annual Mean Temperature (degree C)", wm = TRUE)
ac <- anc.clim( target =tree5 ,pno=clim3, n = 100)
acbio12 <- anc.clim( target =tree5 ,pno=clim5, n = 100,method = "ML")
acbio11 <- anc.clim( target =tree5 ,pno=clim3, n = 100,method = "ML")
plot(tree5)
tree5$tip.label
head(bio11va)
png("鼠李沙棘_bio12.png",width =1600, height = 1350,res =300)
plotAncClim(acbio12 , tipmode = 3,col = c("pink", "purple", "blue","red"),
lwd =1.5,cex =0.5,
ylab = "log(BIO12)-年降雨量",nchar = 4)
dev.off()
png("鼠李沙棘_bio10.png",width =1600, height = 1350,res =300)
plotAncClim(ac , tipmode = 3,col = c("pink", "purple", "blue","red"),
lwd =1.5,cex =0.5,
ylab = "BIO10-最热季节的平均温度",nchar = 4)
dev.off()
png("鼠李沙棘_bio11.png",width =1600, height = 1350,res =300)
plotAncClim(acbio11 , tipmode = 3,col = c("pink", "purple", "blue","red"),
lwd =1.5,cex =0.5,
ylab = "BIO11-最冷季节的平均温度",nchar = 4)
dev.off()
多序列建树--合并、比对、分发
setwd("c:/Users/admin/Desktop/构架沙棘属系统发育树/")
library(ape)
nmaess <- list.files("./下载序列2/")
library(seqinr)
nam2 <- nmaess[c(1,2,4,7,10,13,16,19,20,23,26,29,32,35,38,39)]
rout <- paste0("./下载序列2/",nam2[1])
da <- data.frame()
for(i in 1:16){
rout <- paste0("C:/Users/admin/Desktop/构架沙棘属系统发育树/下载序列2/",nam2[i])
daunlist(fa_seq2 ) <- rbind(da,rout)
}
da2 <- c(da)
names(da2) <- "nam"
library(Biostrings)
files <- da2$nam
fa_seq = lapply(files,readDNAStringSet)
gg1 <- c()
te <- function(x){
gg2 <- fa_seq[[x]][5]
gg1 <- c(gg1,gg2)
return(gg1)
}
ggg1 <- lapply(1:16,te)
fa_seq2 = do.call(c,ggg1)
fa_seq3 <- do.call(c,fa_seq2 )
writeXStringSet(fa_seq3, file="gg1.fa")
library(muscle)
g1 <- muscle::muscle(fa_seq3)
g2 <- muscle::muscle(fa_seq3)
g3 <- muscle::muscle(fa_seq3)
g4 <- muscle::muscle(fa_seq3)
g5 <- muscle::muscle(fa_seq3)
outfa <- function(x){
nn <- c(g1@unmasked[x],g2@unmasked[x],g3@unmasked[x],
g4@unmasked[x],g5@unmasked[x])
writeXStringSet(nn, file= paste0(nam2[x]))
}
lapply(1:16,outfa)
nnmm <- list.files("./bidui/")
da <- data.frame()
for(i in 1:15){
rout <- paste0("C:/Users/admin/Desktop/构架沙棘属系统发育树/bidui/",nnmm[i])
da <- rbind(da,rout)
}
files <- c(da)
names(files) <- "names"
library(Biostrings)
fa_seq = lapply(files,readDNAStringSet)
names(fa_seq$names) = nnmm
writeXStringSet(fa_seq$names, file="biduiclean.fa")